tensorflow-object-detection:物件辨識-3-模型實際使用(Day 19)

2024 iThome 鐵人賽

DAY 19

AI/ ML & Data

從0開始的影像辨識之路系列第 20 篇

16th鐵人賽

max

2024-08-19 12:08:55

387 瀏覽

分享至

本次主題是以colab的環境進行學習的，在本篇文章中，我將講解影像辨識的基礎技能在接下來的文章中這些技能將多次出現，先讀過這些語法再繼續去看後面的文章會比較能快速上手喔。依照進度每個禮拜都會記錄不同的影像辨識方法，基本順序會從:

OpenCV
圖片分類(Tensorflow-Image classification)
語意分割(Semantic Segmentation)
生成模仿圖片(CycleGAN and pix2pix in PyTorch)
物件辨識(tensorflow object detection)
額外分享(MediaPipe)

文章順序有更改，主要原因是因為模型訓練部分內容有點多，所以我先講解實際應用，如果最後還有時間我會再將模型訓練補上。

一開始先去下載已訓練好的模型，模型載好之後，就可以開始預測了，這是我自己練的能預測汽車、摩托車跟腳踏車還有人。
模型載入及預測:

import sys
import datetime
from PIL import Image
from IPython.display import display
import PIL
from PIL import ImageOps
import matplotlib.pyplot as plt
import cv2 as cv
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Activation, ReLU
from tensorflow.keras.layers import BatchNormalization, Conv2DTranspose, Concatenate
from tensorflow.keras.models import Model, Sequential
import tensorflow as tf
import numpy as np
import os
import tarfile
from skimage import io
import shutil
import zipfile
import pathlib
import glob
import pandas as pd
from matplotlib import gridspec
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import urllib
from IPython.display import clear_output
import matplotlib.pyplot as plt
import matplotlib.image


image_url = input('enter URL')
if(image_url.find('http') == -1):
  image_url = 'https://robbreport.com/wp-content/uploads/2024/04/RR_50_Most_Expensive_Cars_Update_Lead.jpg?w=1000'
filenames = image_url.split("/")
url_response = urllib.request.urlopen(image_url)
img_array = np.array(bytearray(url_response.read()), dtype=np.uint8)
img = cv.imdecode(img_array, cv.IMREAD_UNCHANGED)
num_classes = 6
batch_size = 10
img_size = (160, 160)


image_list = "/content/drive/MyDrive/submission_end/Public_Testing_Dataset_Only_for_detection"
save_dir = "."

filename = []
filesc = []
fileclass = []
filex = []
filey = []
filew = []
fileh = []
img_height = 180
img_width = 180
batch_size = 32
j = 0
submit2=pd.DataFrame()
y = 0


DETECTION_THRESHOLD = 0.3
import time
import numpy as np


model_path = 'https://github.com/max106051231/nspo/raw/cbf6cb739cc241af8038b9c5d664f20004569c7a/model.tflite'
model_path = urllib.request.urlretrieve(model_path,"model.tflite")

classes = ['???'] * 1000
label_map = 'https://raw.githubusercontent.com/max106051231/nspo/cbf6cb739cc241af8038b9c5d664f20004569c7a/labelmap.txt'
label_map = urllib.request.urlretrieve(label_map,"labelmap.txt")
label_map = open("labelmap.txt",'r')
label_line = label_map.readlines()
t = 0
for label_name in label_line:
    classes[t] = label_name
    t = t+1

COLORS = np.random.randint(0, 255, size=(len(classes), 3), dtype=np.uint8)

def preprocess_image(image_path, input_size):
  img = tf.io.read_file(image_path)
  img = tf.io.decode_image(img, channels=3)
  img = tf.image.convert_image_dtype(img, np.float32)
  original_image = img
  resized_img = tf.image.resize(img, input_size)
  resized_img = resized_img[tf.newaxis, :]
  resized_img = tf.cast(resized_img, dtype=np.float32)
  return resized_img, original_image

def detect_objects(interpreter, image, threshold):

  signature_fn = interpreter.get_signature_runner()


  output = signature_fn(input=image)

  count = int(np.squeeze(output['output_0']))
  scores = np.squeeze(output['output_1'])
  classes = np.squeeze(output['output_2'])
  boxes = np.squeeze(output['output_3'])

  results = []
  for i in range(count):
    if scores[i] >= threshold:
      result = {
        'bounding_box': boxes[i],
        'class_id': classes[i],
        'score': scores[i]
      }
      results.append(result)
  return results

def run_odt_and_draw_results(image_path, interpreter, threshold=0.5):
  preprocessed_image, original_image = preprocess_image(
      image_path,
      (input_height, input_width)
    )

  results = detect_objects(interpreter, preprocessed_image, threshold=threshold)

  original_image_np = image_nn
  for obj in results:
    ymin, xmin, ymax, xmax = obj['bounding_box']
    xmin = int(xmin * original_image_np.shape[1])
    xmax = int(xmax * original_image_np.shape[1])
    ymin = int(ymin * original_image_np.shape[0])
    ymax = int(ymax * original_image_np.shape[0])
    filex.append(int(xmin))
    filey.append(int(ymin))
    fileh.append(int(ymax-ymin))
    filew.append(int(xmax-xmin))
    filename.append(line)
    
    class_id = int(obj['class_id'])
    #print(class_id)
    if(class_id == 0):
        fileclass.append(2)
    elif(class_id == 1):
        fileclass.append(4)
    elif(class_id == 3):
        fileclass.append(3)
    elif(class_id == 7):
        fileclass.append(1)
    elif(class_id == 5):
        fileclass.append(1)
    elif(class_id == 2):
        fileclass.append(1)
    else:
        fileclass.append('x')
    filesc.append(obj['score'])
    
    
    color = [int(c) for c in COLORS[class_id]]
    cv.rectangle(original_image_np, (xmin, ymin), (xmax, ymax), color, 2)
    # Make adjustments to make the label visible for all objects
    y = ymin - 15 if ymin - 15 > 15 else ymin + 15
    label = "{}: {:.0f}%".format(classes[class_id], obj['score'] * 100)
    cv.putText(original_image_np, label, (xmin, y),
        cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
        
        
  original_uint8 = original_image_np.astype(np.uint8)
  return original_uint8





def preprocess_image(image_path, input_size):

  img = tf.io.read_file(image_path)
  img = tf.io.decode_image(img, channels=3)
  img = tf.image.convert_image_dtype(img, np.float32)
  original_image = img
  resized_img = tf.image.resize(img, input_size)
  resized_img = resized_img[tf.newaxis, :]
  resized_img = tf.cast(resized_img, dtype=np.float32)
  return resized_img, original_image

i=0
for line in filenames[-1]:
    #try:
        i = i+1
        image_np = img
        images = line.split(".")[0]
        image_n = cv.resize(image_np[:,:,::-1],(160,160))
        image_nn = cv.resize(image_np[:,:,::-1],(1920,1080))
        plt.imshow(image_nn)
        cv.imwrite(filenames[-1].split("?")[0],image_np)
        plt.title("Before identification is complete")
        plt.show()
        image_n = np.expand_dims(image_n,axis=0)

        new_img = cv.resize(image_np, (512, 512),cv.INTER_CUBIC)
        new_img_1 = new_img
        DETECTION_THRESHOLD = 0.3

        interpreter = tf.lite.Interpreter(model_path="model.tflite")
        interpreter.allocate_tensors()
        signatures = interpreter.get_signature_list()

        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()

        detection_result_image = run_odt_and_draw_results(
                filenames[-1].split("?")[0],
                interpreter,
                threshold=DETECTION_THRESHOLD
        )
        cv.imwrite(save_dir+"/object_detections/"+images[0]+'.png',detection_result_image)
        detection_result_image = cv.resize(detection_result_image, (1920, 1080),cv.INTER_CUBIC)
        plt.imshow(detection_result_image)
        plt.title("After identification is complete")
        plt.show()
        if(i >= 1):
          break
        y = y+1

原始圖片: